# Sources:
# 1. http://en.wikipedia.org/wiki/Spanish_general_election,_2008
# 2. http://en.wikipedia.org/wiki/Category:Electoral_Districts_of_the_Congress_of_Deputies_(Spain)
# 3. # http://www.electionresources.org/es/index_en.html

######################### Load Libraries #######################################

library(foreign)

library(seatsvotes)

############################# Functions ########################################

# modified version of Linzer's swr function for use with pr systems

swrpr <- function (fit, frac, sims = 1000, graph = TRUE) 
{
    starttime <- Sys.time()
    dat <- reconstruct(fit)
    dat[is.na(dat)] <- 0
    np <- ncol(dat)
    votemat <- NULL
    seatmat <- NULL
    for (s in 1:sims) {
        vsim <- sim.election(fit, dat)
        vsim[is.na(vsim)] <- 0
        partyvotes <- vsim[, 1] * vsim[, -1]
        votemat <- rbind(votemat, colSums(partyvotes)/sum(partyvotes))
        seatmat <- rbind(seatmat, pr(vsim,frac))
    }
    ret <- list()
    truevotes <- dat[, 1] * dat[, -1]
    ret$truevote <- colSums(truevotes)/sum(truevotes)
    ret$trueseat <- pr(dat,frac)
    # names(ret$trueseat) <- names(ret$truevote)
    swing <- NULL
    swing.lm <- NULL
    seatlist <- list()
    for (j in 1:(np - 1)) {
        smean.hi <- mean(seatmat[(votemat[, j] < (ret$truevote[j] + 
            0.012)) & (votemat[, j] > (ret$truevote[j] + 0.008)), 
            j])
        smean.lo <- mean(seatmat[(votemat[, j] > (ret$truevote[j] - 
            0.012)) & (votemat[, j] < (ret$truevote[j] - 0.008)), 
            j])
        swing <- c(swing, round((smean.hi - smean.lo)/0.02, 2))
        swing.lm <- c(swing.lm, round(coefficients(lm(seatmat[, 
            j] ~ votemat[, j]))[2], 2))
        vrange <- seq(min(floor(votemat[, j] * 100)), max(floor(votemat[, 
            j] * 100)), 1)/100
        seatlist[[j]] <- cbind(vrange, matrix(NA, nrow = length(vrange), 
            ncol = 3))
        colnames(seatlist[[j]]) <- c("vote", "mean", "lower", 
            "upper")
        for (i in 1:length(vrange)) {
            sel <- seatmat[(votemat[, j] >= (vrange[i] - 0.002)) & 
                (votemat[, j] < (vrange[i] + 0.002)), j]
            if (length(sel) < 10) {
                seatlist[[j]][i, 2:4] <- NA
            }
            else {
                seatlist[[j]][i, 2] <- mean(sel)
                seatlist[[j]][i, 3:4] <- quantile(sel, probs = c(0.025, 
                  0.975))
            }
        }
    }
    if (graph) {
        par(mar = c(4.4, 4.1, 0.5, 0.5), mfrow = c(1, 1))
       pdf("swrpr_esp.pdf")
        plot(ret$truevote[!is.na(swing)], ret$trueseat[!is.na(swing)], 
            pch = 19, cex = 1.2, xlim = c(0, 0.7), ylim = c(0, 
                0.7), xlab = "Party vote share", ylab = "Party seat share", 
            cex.lab = 1.5, xaxt = "n", yaxt = "n")
        axis(1, seq(0, 1, 0.1), seq(0, 1, 0.1), cex.axis = 1.5)
        axis(2, seq(0, 1, 0.1), seq(0, 1, 0.1), cex.axis = 1.5)
        for (j in 1:(np - 1)) {
            if (!is.na(swing[j])) {
                lines(seatlist[[j]][, 1], seatlist[[j]][, 2], 
                  lwd = 2.5)
                lines(seatlist[[j]][, 1], seatlist[[j]][, 3], 
                  lwd = 2, lty = 3)
                lines(seatlist[[j]][, 1], seatlist[[j]][, 4], 
                  lwd = 2, lty = 3)
                text(ret$truevote[j] + 0.01, ret$trueseat[j], 
                  paste(colnames(dat)[j + 1], ": ", swing[j], 
                    sep = ""), cex = 1.2, pos = 4)
            }
        }
        dev.off()
    }
    vdiff <- as.data.frame(t(t(votemat) - colMeans(votemat)))
    sdiff <- as.data.frame(t(t(seatmat) - colMeans(seatmat)))
    swing.median <- round(apply(sdiff/vdiff, 2, median), 2)
    names(seatlist) <- colnames(votemat) <- colnames(seatmat) <- names(ret$truevote)
    names(swing) <- names(swing.lm) <- names(swing.median) <- names(ret$truevote)
    swingtab <- cbind(round(100 * ret$truevote, 1), round(100 * 
        ret$trueseat, 1), swing)
    rownames(swingtab) <- colnames(dat[, -1])
    colnames(swingtab) <- c("Votes", "Seats", "Swing ratio")
    print(swingtab)
    ret$votemat <- votemat
    ret$seatmat <- seatmat
    ret$swing <- swing
    ret$swing.lm <- swing.lm
    ret$swing.median <- swing.median
    ret$vs <- seatlist
    ret$time <- Sys.time() - starttime
    return(ret)
}

# add swrpr function to Linzer's seatsvotes package
environment(swrpr) <- asNamespace("seatsvotes")

# new pr function modeling Spanish electoral rules 
pr <-
function(votes,frac) {

	votes <- votes[,1]*votes[,-1]
	
	stotal <- as.data.frame(matrix(NA, dim(votes)[1], dim(votes)[2]))
	
	for ( j in 1:dim(votes)[1] ) {
	
		d <- as.data.frame(matrix(NA, frac[j], dim(votes)[2]))	
	
			for (i in 1:frac[j]) {

				d[i,] <- votes[j,]
	
			}

		d <- d/seq(1,frac[j],by=1)
		
		seatrank <- as.vector(apply(d,2,cbind))

		seatrank <- sort(seatrank,decreasing=TRUE)

		stotal[j,] <- colSums(ifelse(d>=seatrank[frac[j]],1,0))
		
		}

		return(colSums(stotal)/sum(frac))

}

# add pr function to Linzer's seatsvotes package
environment(pr) <- asNamespace("seatsvotes")

############################ Data Setup ########################################

############################# Spain ############################################

# change to directory with relevant electoral data
setwd("~/_data/electoral/esp")

# load data for 1996 Spanish election 
spain96dm <- read.dta("spain96_v2.dta")

# generate frac variable for swrpr call
frac <- spain96dm$mag

# drop mag (district magnitude variable)
esp96 <- spain96dm[,-5]

############################ Analysis ##########################################

############################# Spain ############################################

################################## SM Figure 3 #################################

# set random number seed for replicability
set.seed(1234)

# set working directory for graphs; a pdf of the graph will be generated as part of the swrpr function 
setwd("~/_supportingmaterials_tabfig/")

##########
## 1996 ##
##########

# generate data frame for relevant election year
esp96 <- data.frame(Total=esp96$Total,
                   PSOE=esp96$pvs123,
                   PP=esp96$pvs120,
                   IU=esp96$pvs15058)

# set vote share to 0, where threshold was not reached
esp96[which(esp96[,4]<0.03 & esp96[,4]>0),4] <- rep(0,length(which(esp96[,4]<0.03 & esp96[,4]>0)))

# drop Barcelona and Madrid from data set, as they distort simulations
esp96 <- esp96[-c(8,29),]

# drop Barcelona and Madrid from frac vector
frac <- frac[-c(8,29)]

# find contestation patterns (seatsvotes packages).
esp96.split <- findpatterns(esp96)

# estimate mixture model based on contestation patterns identified in Step 2 (seatsvotes package)
fit96 <- list()
fit96[[1]] <- mvnmix(dat=esp96.split[[1]],components=1,nrep=10,scatter=T)
fit96[[2]] <- mvnmix(dat=esp96.split[[2]],components=2,nrep=10,scatter=T)

# verify fit of mixture model estimated in Step 3 (seatsvotes package)
show.marginals(fit96,numdraws=50000)

# simulate seats-votes elasticities (seatsvotes package using modified functions - see above)
sr96 <- swrpr(fit96, frac, sims=10000)

